---
title: "Replication files (I)"
output: html_document
date: "2025-01-10"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r}
library(tidyverse)
library(haven)
```

# Download data

## Download "youth main" dataset for all waves

```{r}
w1_ym_en <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w1_ym_en_v1.2.0_rv.dta")
w1_ym_ge <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w1_ym_ge_v1.2.0_rv.dta")
w1_ym_nl <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w1_ym_nl_v1.2.0_rv.dta")
w1_ym_sw <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w1_ym_sw_v1.2.0_rv.dta")

w2_ym_en <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w2_ym_en_v2.3.0_rv.dta")
w2_ym_ge <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w2_ym_ge_v2.3.0_rv.dta")
w2_ym_nl <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w2_ym_nl_v2.3.0_rv.dta")
w2_ym_sw <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w2_ym_sw_v2.3.0_rv.dta")

w3_ym_en <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w3_ym_en_v3.3.0_rv.dta")
w3_ym_ge <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w3_ym_ge_v3.3.0_rv.dta")
w3_ym_nl <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w3_ym_nl_v3.3.0_rv.dta")
w3_ym_sw <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/youth main/w3_ym_sw_v3.3.0_rv.dta")
```

## Download "parents" datasets (only available in wave 1)

```{r}
w1_p_en <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/parents/w1_p_en_v1.2.0_rv.dta")
w1_p_ge <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/parents/w1_p_ge_v1.2.0_rv.dta")
w1_p_nl <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/parents/w1_p_nl_v1.2.0_rv.dta")
w1_p_sw <- read_dta("/Users/davidsanchezperez/Library/CloudStorage/GoogleDrive-david.sanper99@gmail.com/.shortcut-targets-by-id/1JDCSnikxCn2nYUbMo-ALqZpHu7lp_Qmi/UC3M-G_EPIC/Papers/Intersectionality/CILSEU_DATA/ZA5656_CILS4EU_v3-3-0_stata/parents/w1_p_sw_v1.2.0_rv.dta")
```

## Select variables for wave 1

```{r}
w1_ym_en <- w1_ym_en %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, classid, schoolid, country, female = y1_sex, immi_background = y1_generationG, ethnic_group = y1_idoc1RV, ethnic_strong = y1_idoc2, country_origin_en = y1_countorig_enG, nationality = y1_nationRV, mixed_friend = y1_bgfr13, discr_school = y1_pdisc1, discr_transport = y1_pdisc2, discr_shop = y1_pdisc3, discr_police = y1_pdisc4, associationism = y1_lta5, religious_part = y1_relb1, book_home = y1_books, mother_primary = y1_educm1, mother_secondary = y1_educm2, mother_tertiary = y1_educm3, father_primary = y1_educf1, father_secondary = y1_educf2, father_tertiary = y1_educf3, ideal_educ = y1_edasp1H, date_interview = y1_intdat_ymRV, year_born = y1_doby)

w1_ym_ge <- w1_ym_ge  %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, classid, schoolid, country, female = y1_sex, immi_background = y1_generationG, ethnic_group = y1_idoc1RV, ethnic_strong = y1_idoc2, country_origin_ge = y1_countorig_geG, nationality = y1_nationRV, mixed_friend = y1_bgfr5, discr_school = y1_pdisc1, discr_transport = y1_pdisc2, discr_shop = y1_pdisc3, discr_police = y1_pdisc4, associationism = y1_lta5, religious_part = y1_relb1, book_home = y1_books, mother_primary = y1_educm1, mother_secondary = y1_educm2, mother_tertiary = y1_educm3, father_primary = y1_educf1, father_secondary = y1_educf2, father_tertiary = y1_educf3, ideal_educ = y1_edasp1H, date_interview = y1_intdat_ymRV, year_born = y1_doby)

w1_ym_nl <- w1_ym_nl  %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, classid, schoolid, country, female = y1_sex, immi_background = y1_generationG, ethnic_group = y1_idoc1RV, ethnic_strong = y1_idoc2, country_origin_nl = y1_countorig_nlG, nationality = y1_nationRV, mixed_friend = y1_bgfr3, discr_school = y1_pdisc1, discr_transport = y1_pdisc2, discr_shop = y1_pdisc3, discr_police = y1_pdisc4, associationism = y1_lta5, religious_part = y1_relb1, book_home = y1_books, mother_primary = y1_educm1, mother_secondary = y1_educm2, mother_tertiary = y1_educm3, father_primary = y1_educf1, father_secondary = y1_educf2, father_tertiary = y1_educf3, ideal_educ = y1_edasp1H, date_interview = y1_intdat_ymRV, year_born = y1_doby)

w1_ym_sw <- w1_ym_sw  %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, classid, schoolid, country, female = y1_sex, immi_background = y1_generationG, ethnic_group = y1_idoc1RV, ethnic_strong = y1_idoc2, country_origin_sw = y1_countorig_swG, nationality = y1_nationRV, mixed_friend = y1_bgfr11, discr_school = y1_pdisc1, discr_transport = y1_pdisc2, discr_shop = y1_pdisc3, discr_police = y1_pdisc4, associationism = y1_lta5, religious_part = y1_relb1, book_home = y1_books, mother_primary = y1_educm1, mother_secondary = y1_educm2, mother_tertiary = y1_educm3, father_primary = y1_educf1, father_secondary = y1_educf2, father_tertiary = y1_educf3, ideal_educ = y1_edasp1H, date_interview = y1_intdat_ymRV, year_born = y1_doby)
```

## Select variables for wave 2

```{r}
w2_ym_en <- w2_ym_en %>% mutate(wave = 2) %>% 
  dplyr::select(youthid, wave, country, poliint = y2_cintsc1, female = y2_sex, immi_background = y2_generationG, ethnic_group = y2_idoc1RV, ethnic_strong = y2_idoc2, country_origin_en = y2_countorig_enG, mixed_friend = y2_bgfr13, talk_parents_pol = y2_fcomm1, associationism = y2_lta5, religious_part = y2_relb1, talk_family = y2_luoc1, talk_watch = y2_luoc4, talk_friend = y2_luoc5, mother_primary = y2_educm1, mother_secondary = y2_educm2, mother_tertiary = y2_educm3, father_primary = y2_educf1, father_secondary = y2_educf2, father_tertiary = y2_educf3, ideal_educ = y2_edasp1H, date_interview = y2_intdat_ymRV)

w2_ym_ge <- w2_ym_ge  %>% mutate(wave = 2) %>% 
  dplyr::select(youthid, wave, country, poliint = y2_cintsc1, female = y2_sex, immi_background = y2_generationG, ethnic_group = y2_idoc1RV, ethnic_strong = y2_idoc2, country_origin_ge = y2_countorig_geG, mixed_friend = y2_bgfr5, talk_parents_pol = y2_fcomm1, associationism = y2_lta5, religious_part = y2_relb1, talk_family = y2_luoc1, talk_watch = y2_luoc4, talk_friend = y2_luoc5, mother_primary = y2_educm1, mother_secondary = y2_educm2, mother_tertiary = y2_educm3, father_primary = y2_educf1, father_secondary = y2_educf2, father_tertiary = y2_educf3, ideal_educ = y2_edasp1H, date_interview = y2_intdat_ymRV)

w2_ym_nl <- w2_ym_nl  %>% mutate(wave = 2) %>% 
  dplyr::select(youthid, wave, country, poliint = y2_cintsc1, female = y2_sex, immi_background = y2_generationG, ethnic_group = y2_idoc1RV, ethnic_strong = y2_idoc2, country_origin_nl = y2_countorig_nlG, mixed_friend = y2_bgfr3, talk_parents_pol = y2_fcomm1, associationism = y2_lta5, religious_part = y2_relb1, talk_family = y2_luoc1, talk_watch = y2_luoc4, talk_friend = y2_luoc5, mother_primary = y2_educm1, mother_secondary = y2_educm2, mother_tertiary = y2_educm3, father_primary = y2_educf1, father_secondary = y2_educf2, father_tertiary = y2_educf3, ideal_educ = y2_edasp1H, date_interview = y2_intdat_ymRV)

w2_ym_sw <- w2_ym_sw  %>% mutate(wave = 2) %>% 
  dplyr::select(youthid, wave, country, poliint = y2_cintsc1, female = y2_sex, immi_background = y2_generationG, ethnic_group = y2_idoc1RV, ethnic_strong = y2_idoc2, country_origin_sw = y2_countorig_swG, mixed_friend = y2_bgfr11, talk_parents_pol = y2_fcomm1, associationism = y2_lta5, religious_part = y2_relb1, talk_family = y2_luoc1, talk_watch = y2_luoc4, talk_friend = y2_luoc5, mother_primary = y2_educm1, mother_secondary = y2_educm2, mother_tertiary = y2_educm3, father_primary = y2_educf1, father_secondary = y2_educf2, father_tertiary = y2_educf3, ideal_educ = y2_edasp1H, date_interview = y2_intdat_ymRV)
```

## Select variables for wave 3

```{r}
w3_ym_en <- w3_ym_en %>% mutate(wave = 3) %>% 
  dplyr::select(youthid, wave, country, poliint = y3_cintsc1, female = y3_sex, immi_background = y3_generationG, ethnic_group = y3_idoc1RV, ethnic_strong = y3_idoc2, country_origin_en = y3_countorig_enG, nationality = y3_nationRV, mixed_friend = y3_bgfr13, associationism = y3_lta5, religious_part = y3_relb1,talk_family = y3_luoc1, talk_watch = y3_luoc4, talk_friend = y3_luoc5, mother_primary = y3_educm1, mother_secondary = y3_educm2, mother_tertiary = y3_educm3, father_primary = y3_educf1, father_secondary = y3_educf2, father_tertiary = y3_educf3, ideal_educ = y3_edasp1H)

w3_ym_ge <- w3_ym_ge  %>%  mutate(wave = 3) %>% 
  dplyr::select(youthid, wave, country, poliint = y3_cintsc1, female = y3_sex, immi_background = y3_generationG, ethnic_group = y3_idoc1RV, ethnic_strong = y3_idoc2, country_origin_ge = y3_countorig_geG, nationality = y3_nationRV, mixed_friend = y3_bgfr5, associationism = y3_lta5, religious_part = y3_relb1,talk_family = y3_luoc1, talk_watch = y3_luoc4, talk_friend = y3_luoc5, mother_primary = y3_educm1, mother_secondary = y3_educm2, mother_tertiary = y3_educm3, father_primary = y3_educf1, father_secondary = y3_educf2, father_tertiary = y3_educf3, ideal_educ = y3_edasp1H)

w3_ym_nl <- w3_ym_nl  %>%  mutate(wave = 3) %>% 
  dplyr::select(youthid, wave, country, poliint = y3_cintsc1, female = y3_sex, immi_background = y3_generationG, ethnic_group = y3_idoc1RV, ethnic_strong = y3_idoc2, country_origin_nl = y3_countorig_nlG, nationality = y3_nationRV, mixed_friend = y3_bgfr3, associationism = y3_lta5, religious_part = y3_relb1,talk_family = y3_luoc1, talk_watch = y3_luoc4, talk_friend = y3_luoc5, mother_primary = y3_educm1, mother_secondary = y3_educm2, mother_tertiary = y3_educm3, father_primary = y3_educf1, father_secondary = y3_educf2, father_tertiary = y3_educf3, ideal_educ = y3_edasp1H)

w3_ym_sw <- w3_ym_sw  %>%  mutate(wave = 3) %>% 
  dplyr::select(youthid, wave, country, poliint = y3_cintsc1, female = y3_sex, immi_background = y3_generationG, ethnic_group = y3_idoc1RV, ethnic_strong = y3_idoc2, country_origin_sw = y3_countorig_swG, nationality = y3_nationRV, mixed_friend = y3_bgfr11, associationism = y3_lta5, religious_part = y3_relb1,talk_family = y3_luoc1, talk_watch = y3_luoc4, talk_friend = y3_luoc5, mother_primary = y3_educm1, mother_secondary = y3_educm2, mother_tertiary = y3_educm3, father_primary = y3_educf1, father_secondary = y3_educf2, father_tertiary = y3_educf3, ideal_educ = y3_edasp1H)
```

## Download parents for all waves

## Select variables for wave 1

```{r}
w1_p_en <- w1_p_en %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, country, second_lang = p1_loc1, ideal_educ_par = p1_edasp1H, gender_care = p1_grol1, gender_cook = p1_grol2,  gender_money = p1_grol3, gender_clean = p1_grol4)
w1_p_ge <- w1_p_ge %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, country, second_lang = p1_loc1, ideal_educ_par = p1_edasp1H, gender_care = p1_grol1, gender_cook = p1_grol2, gender_money = p1_grol3,gender_clean = p1_grol4)
w1_p_nl <- w1_p_nl %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, country, second_lang = p1_loc1, ideal_educ_par = p1_edasp1H, gender_care = p1_grol1, gender_cook = p1_grol2, gender_money = p1_grol3, gender_clean = p1_grol4)
w1_p_sw <- w1_p_sw %>% mutate(wave = 1) %>% 
  dplyr::select(youthid, wave, country, second_lang = p1_loc1, ideal_educ_par = p1_edasp1H, gender_care = p1_grol1, gender_cook = p1_grol2, gender_money = p1_grol3, gender_clean = p1_grol4)
```

## Unify into a single dataset

```{r}
full_data <- bind_rows(w1_ym_en, w1_ym_ge, w1_ym_nl, w1_ym_sw, w2_ym_en, w2_ym_ge, w2_ym_nl, w2_ym_sw, w3_ym_en, w3_ym_ge, w3_ym_nl, w3_ym_sw) %>% 
  left_join(w1_p_en) %>% 
  left_join(w1_p_ge) %>% 
  left_join(w1_p_nl) %>% 
  left_join(w1_p_sw) 
```

# Clean variables

## Clean NAs

```{r}
full_data <- full_data %>%
  mutate_all(~ ifelse(. < 0, NA, .))
```

## Country

```{r}
table(full_data$country)
full_data$country <- as.factor(full_data$country)
full_data$country <- factor(full_data$country, levels = c(1,2,3,4), labels = c("England", "Germany", "Netherlands", "Sweden"))
table(full_data$country)
```

## Political interest

```{r}
table(full_data$poliint)
full_data$poliint <- recode(full_data$poliint, `1` = 5,`2` = 4,`3` = 3,`4` = 2,`5` = 1)
table(full_data$poliint)
```

## Female

```{r}
table(full_data$female)
full_data$female <- recode(full_data$female, `1` = 0,`2` = 1)
table(full_data$female)
```

## Immigrant background recoded

```{r}
table(full_data$immi_background)
full_data <- full_data %>%
  mutate(immi_background_rec = case_when(
    immi_background %in% 1:4 ~ "Immigrant",
    immi_background %in% 5:8 ~ "Child of immigrants",
    immi_background %in% 9:14 ~ "Other/Native",
    immi_background %in% 15:20 ~ NA))
table(full_data$immi_background_rec)
```

## Immigrant background

```{r}
table(full_data$immi_background)
full_data <- full_data %>%
  mutate(immi_background = case_when(
    immi_background %in% 1:4 ~ "First generation",
    immi_background %in% 5:7 ~ "Second generation",
    immi_background %in% 9:12 ~ "Third generation and interethnic third generation",
    immi_background == 8 ~ "Interethnic second generation",
    immi_background %in% 13:14 ~ "Natives",
    immi_background %in% 15:20 ~ NA))
table(full_data$immi_background)
```

## Country of origin

### Country of origin England

```{r}
full_data <- full_data %>%
  mutate(country_origin = case_when(
    country_origin_en == 1 ~ "Survey country",
    country_origin_en %in% c(5,12,16,17,18) ~ "Europe and North America",
    country_origin_en %in% c(6,8,9,10) ~ "Africa and Middle East",
    country_origin_en %in% c(2,3,7,13,14,15) ~ "Asia",
    country_origin_en %in% c(4,11) ~ "Latin America and Caribbean",
    country_origin_ge == 1 ~ "Survey country",
    country_origin_ge %in% c(3,4,5,6,8,12,16,17,18) ~ "Europe and North America",
    country_origin_ge %in% c(2,7,9,10) ~ "Africa and Middle East",
    country_origin_ge %in% c(13,14,15) ~ "Asia",
    country_origin_ge %in% c(11) ~ "Latin America and Caribbean",
    country_origin_nl == 1 ~ "Survey country",
    country_origin_nl %in% c(7,10,14,15,16) ~ "Europe and North America",
    country_origin_nl %in% c(2,3,8) ~ "Africa and Middle East",
    country_origin_nl %in% c(5,11,12,13) ~ "Asia",
    country_origin_nl %in% c(4,6,9) ~ "Latin America and Caribbean",
    country_origin_sw == 1 ~ "Survey country",
    country_origin_sw %in% c(2,3,9,11,12,13,18,23,24,25) ~ "Europe and North America",
    country_origin_sw %in% c(4,5,6,7,8,10,14,15,16) ~ "Africa and Middle East",
    country_origin_sw %in% c(19,20,21,22) ~ "Asia",
    country_origin_sw %in% c(17) ~ "Latin America and Caribbean"))
table(full_data$country_origin)
```

## Nationality

```{r}
table(full_data$nationality)
full_data <- full_data %>%
  mutate(nationality = case_when(
    nationality == 1 ~ "Only survey country nationality ",
    nationality == 2 ~ "Survey country and other nationality",
    nationality == 3 ~ "Only other nationality"))
table(full_data$nationality)
```

## Mixed friends

```{r}
table(full_data$mixed_friend)
full_data <- full_data %>%
  mutate(mixed_friend = case_when(
    mixed_friend %in% 1:3 ~ 1,
    mixed_friend %in% 4:5 ~ 0))
table(full_data$mixed_friend)
```

## Political talk

```{r}
table(full_data$talk_parents_pol)
full_data$talk_parents_pol <- recode(full_data$talk_parents_pol, `1` = 5,`2` = 4,`3` = 3,`4` = 2,`5` = 1)
table(full_data$talk_parents_pol)
```

## Associationism

```{r}
table(full_data$associationism)
full_data$associationism <- recode(full_data$associationism, `1` = 5,`2` = 4,`3` = 3,`4` = 2,`5` = 1)
table(full_data$associationism)
```

## Religious participation

```{r}
table(full_data$religious_part)
```

## Books at home

```{r}
table(full_data$book_home)
```

## Mother's education

This variable is not in Hochman & García-Albacete (2019). It shows only the highest level of education attained.

Only answers for wave 1.

```{r}
table(full_data$mother_primary)
table(full_data$mother_secondary)
table(full_data$mother_tertiary)
```

```{r}
full_data <- full_data %>%
  mutate(mother_educ = case_when(
    mother_tertiary == 1 ~ "Tertiary education",
    mother_secondary == 1 & mother_tertiary == 2 ~ "Secondary education",
    mother_primary == 1 & mother_secondary == 2 & mother_tertiary == 2 ~ "Primary education",
    mother_primary == 2 & mother_secondary == 2 & mother_tertiary == 2 ~ "No studies",
    TRUE ~ NA))
table(full_data$mother_educ)
```

## Father's education

This variable is not in Hochman & García-Albacete (2019). It shows only the highest level of education attained.

Only answers for wave 1.

```{r}
table(full_data$father_primary)
table(full_data$father_secondary)
table(full_data$father_tertiary)
```

```{r}
full_data <- full_data %>%
  mutate(father_educ = case_when(
    father_tertiary == 1 ~ "Tertiary education",
    father_secondary == 1 & father_tertiary == 2 ~ "Secondary education",
    father_primary == 1 & father_secondary == 2 & father_tertiary == 2 ~ "Primary education",
    father_primary == 2 & father_secondary == 2 & father_tertiary == 2 ~ "No studies",
    TRUE ~ NA))
table(full_data$father_educ)
```

### Parents education

```{r}
full_data <- full_data %>%
  mutate(parent_educ = case_when(
    mother_educ == "Tertiary education" | father_educ == "Tertiary education" ~ "Tertiary education",
    mother_educ == "Secondary education" | father_educ == "Secondary education" ~ "Secondary education",
    mother_educ == "Primary education" | father_educ == "Primary education" ~ "Primary education",
    mother_educ == "No studies" & father_educ == "No studies" ~ "No studies",
    TRUE ~ NA))
table(full_data$parent_educ)
```

## Age

```{r}
full_data <- full_data %>%
  group_by(youthid) %>%
  mutate(base_age = if_else(wave == 1, 2010 - year_born, NA_real_))

full_data <- full_data %>%
  group_by(youthid) %>%
  mutate(base_age = first(base_age[!is.na(base_age)]),
    age = case_when(
      wave == 1 ~ base_age,
      wave == 2 ~ base_age + 1,
      wave == 3 ~ base_age + 2,
      TRUE ~ NA))
table(full_data$age)
```

## Gender roles

### Gender care

```{r}
table(full_data$gender_care)
full_data <- full_data %>%
  mutate(gender_care = case_when(
    gender_care == 2 ~ 1,
    gender_care %in% c(1,3) ~ 0))
table(full_data$gender_care)
```

### Gender cook

```{r}
table(full_data$gender_cook)
full_data <- full_data %>%
  mutate(gender_cook = case_when(
    gender_cook == 2 ~ 1,
    gender_cook %in% c(1,3) ~ 0))
table(full_data$gender_cook)
```

### Gender money

```{r}
table(full_data$gender_money)
full_data <- full_data %>%
  mutate(gender_money = case_when(
    gender_money == 1 ~ 1,
    gender_money %in% c(2,3) ~ 0))
table(full_data$gender_money)
```

### Gender clean

```{r}
table(full_data$gender_clean)
full_data <- full_data %>%
  mutate(gender_clean = case_when(
    gender_clean == 2 ~ 1,
    gender_clean %in% c(1,3) ~ 0))
table(full_data$gender_clean)
```

### Gender roles

```{r}
full_data <- full_data %>%
  mutate(gender_roles = gender_care + gender_cook +gender_money + gender_clean)
table(full_data$gender_roles)
```

# Pooled data

## Only wave 2

```{r}
data_wave2 <- full_data %>% 
  group_by(youthid) %>%
  mutate(book_home = ifelse(is.na(book_home) & wave == 2,
                            first(book_home[wave == 1]), 
                            book_home),
         nationality = ifelse(is.na(nationality) & wave == 2,
                              first(nationality[wave == 1]),   
                              nationality),
         gender_roles = ifelse(is.na(gender_roles) & wave == 2,
                            first(gender_roles[wave == 1]), 
                            gender_roles)) %>% 
  filter(wave == 2)
```

# Select variables of interest

```{r}
data_wave2 <- data_wave2 %>% 
  dplyr::select(youthid, wave, poliint,country, female, immi_background_rec, country_origin, nationality, age, book_home, parent_educ, talk_parents_pol, associationism, religious_part, mixed_friend, gender_roles)
```

## Another combined variable for the alternative model

```{r}
data_wave2 <- data_wave2 %>%
  mutate(combi_fem_migr = case_when(
    female == 1 & immi_background_rec == "Immigrant" ~ "Immigrant_Female",
    female == 0 & immi_background_rec == "Immigrant" ~ "Immigrant_Male",
    female == 1 & immi_background_rec == "Child of immigrants" ~ "Child_of_Immigrants_Female",
    female == 0 & immi_background_rec == "Child of immigrants" ~ "Child_of_Immigrants_Male",
    female == 1 & immi_background_rec == "Other/Native" ~ "Native_Female",
    female == 0 & immi_background_rec == "Other/Native" ~ "Native_Male",
    TRUE ~ NA)) %>% 
  mutate(combi_fem_migr = factor(combi_fem_migr, levels = c("Native_Male", "Native_Female", "Immigrant_Male", "Child_of_Immigrants_Male", "Immigrant_Female", "Child_of_Immigrants_Female"))) 
summary(data_wave2$combi_fem_migr)
```

## Change immigrant background categories

```{r}
table(data_wave2$immi_background_rec)
data_wave2 <- data_wave2 %>% 
  mutate(immi_background_dic = case_when(
    immi_background_rec == "Other/Native" ~ "Native",
    immi_background_rec == "Immigrant" ~ "Immigrant",
    immi_background_rec == "Child of immigrants" ~ "Immigrant")) %>% 
    mutate(immi_background_dic = factor(immi_background_dic, levels = c("Native", "Immigrant")))
table(data_wave2$immi_background_dic)
```


# Download data

```{r}
write_csv(data_wave2, "data_wave2.csv")
```

